import requests
from bs4 import BeautifulSoup
import json
import os

tab_list = [
    {"title": "热搜", "tab": "realtime"},
    {"title": "小说", "tab": "novel"},
    {"title": "电影", "tab": "movie"},
    {"title": "电视剧", "tab": "teleplay"},
    {"title": "汽车", "tab": "car"},
    {"title": "游戏", "tab": "game"},
]

# 定义写入 JSON 文件的方法
def write_to_json(data, filename):
    os.makedirs(os.path.dirname(filename), exist_ok=True)
    with open(filename, "w", encoding="utf-8") as f:
        json.dump(data, f, ensure_ascii=False, indent=4)
    print(f"数据已成功写入到 {filename}")


def get_list(tab, title):
    print(f"开始获取 {title}...")
    url = f"https://top.baidu.com/board?tab={tab}"
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36"
    }
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.text, "html.parser")

    # 定位内容
    items = soup.select("div.content_1YWBm")
    data_list = []

    for item in items:
        try:
            title_name = item.select_one("a.title_dIF3B div.c-single-text-ellipsis").text
            title_link = item.select_one("a.title_dIF3B")["href"]
            data_list.append({"title": title_name.strip(), "link": title_link.strip()})
        except Exception as e:
            print(f"无法获取 {title} 的部分数据: {e}")

    filename = f"output/beautifulsoup/{title}.json"
    write_to_json(data_list, filename)
    print(f"完成获取 {title}. 一共获取到 {len(data_list)} 条数据。")
    print("--------------------------------------------")


for item in tab_list:
    get_list(item["tab"], item["title"])
